{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<img src=\"https://certificate.tpq.io/taim_logo.png\" width=\"350px\" align=\"right\">"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Artificial Intelligence in Finance"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Recurrent Neural Networks"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Dr Yves J Hilpisch | The AI Machine\n",
    "\n",
    "http://aimachine.io | http://twitter.com/dyjh"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## First Example"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import random\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import tensorflow as tf\n",
    "from pprint import pprint\n",
    "from pylab import plt, mpl\n",
    "plt.style.use('seaborn')\n",
    "mpl.rcParams['savefig.dpi'] = 300\n",
    "mpl.rcParams['font.family'] = 'serif'\n",
    "pd.set_option('precision', 4)\n",
    "np.set_printoptions(suppress=True, precision=4)\n",
    "os.environ['PYTHONHASHSEED'] = '0'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def set_seeds(seed=100):\n",
    "    random.seed(seed)\n",
    "    np.random.seed(seed)\n",
    "    tf.random.set_seed(seed)\n",
    "set_seeds()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "a = np.arange(100)\n",
    "a"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "a = a.reshape((len(a), -1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "a.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "a[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from keras.preprocessing.sequence import TimeseriesGenerator"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "lags = 3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "g = TimeseriesGenerator(a, a, length=lags, batch_size=5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pprint(list(g)[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from keras.models import Sequential\n",
    "from keras.layers import SimpleRNN, LSTM, Dense"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = Sequential()\n",
    "model.add(SimpleRNN(100, activation='relu',\n",
    "                    input_shape=(lags, 1)))\n",
    "model.add(Dense(1, activation='linear'))\n",
    "model.compile(optimizer='adagrad', loss='mse',\n",
    "              metrics=['mae'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "h = model.fit(g, epochs=1000, steps_per_epoch=5,\n",
    "            verbose=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "res = pd.DataFrame(h.history)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "res.tail(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "res.iloc[10:].plot(figsize=(10, 6), style=['--', '--']);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = np.array([21, 22, 23]).reshape((1, lags, 1))\n",
    "y = model.predict(x, verbose=False)\n",
    "int(round(y[0, 0]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = np.array([87, 88, 89]).reshape((1, lags, 1))\n",
    "y = model.predict(x, verbose=False)\n",
    "int(round(y[0, 0]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = np.array([187, 188, 189]).reshape((1, lags, 1))\n",
    "y = model.predict(x, verbose=False)\n",
    "int(round(y[0, 0]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = np.array([1187, 1188, 1189]).reshape((1, lags, 1))\n",
    "y = model.predict(x, verbose=False)\n",
    "int(round(y[0, 0]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Second Example"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def transform(x):\n",
    "    y = 0.05 * x ** 2 + 0.2 * x + np.sin(x) + 5\n",
    "    y += np.random.standard_normal(len(x)) * 0.2\n",
    "    return y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = np.linspace(-2 * np.pi, 2 * np.pi, 500)\n",
    "a = transform(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(10, 6))\n",
    "plt.plot(x, a);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "a = a.reshape((len(a), -1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "a[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "lags = 5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "g = TimeseriesGenerator(a, a, length=lags, batch_size=5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = Sequential()\n",
    "model.add(SimpleRNN(500, activation='relu', input_shape=(lags, 1)))\n",
    "model.add(Dense(1, activation='linear'))\n",
    "model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "model.fit(g, epochs=500,\n",
    "          steps_per_epoch=10,\n",
    "          verbose=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = np.linspace(-6 * np.pi, 6 * np.pi, 1000)\n",
    "d = transform(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "g_ = TimeseriesGenerator(d, d, length=lags, batch_size=len(d))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "f = list(g_)[0][0].reshape((len(d) - lags, lags, 1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "y = model.predict(f, verbose=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(10, 6))\n",
    "plt.plot(x[lags:], d[lags:], label='data', alpha=0.75)\n",
    "plt.plot(x[lags:], y, 'r.', label='pred', ms=3)\n",
    "plt.axvline(-2 * np.pi, c='g', ls='--')\n",
    "plt.axvline(2 * np.pi, c='g', ls='--')\n",
    "plt.text(-15, 22, 'out-of-sample')\n",
    "plt.text(-2, 22, 'in-sample')\n",
    "plt.text(10, 22, 'out-of-sample')\n",
    "plt.legend();"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Financial Price Series"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "url = 'http://hilpisch.com/aiif_eikon_id_eur_usd.csv'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "symbol = 'EUR_USD'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "raw = pd.read_csv(url, index_col=0, parse_dates=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate_data():\n",
    "    data = pd.DataFrame(raw['CLOSE'])\n",
    "    data.columns = [symbol]\n",
    "    data = data.resample('30min', label='right').last().ffill()\n",
    "    return data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = generate_data()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = (data - data.mean()) / data.std()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "p = data[symbol].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "p = p.reshape((len(p), -1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "lags = 5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "g = TimeseriesGenerator(p, p, length=lags, batch_size=5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def create_rnn_model(hu=100, lags=lags, layer='SimpleRNN',\n",
    "                           features=1, algorithm='estimation'):\n",
    "    model = Sequential()\n",
    "    if layer == 'SimpleRNN':\n",
    "        model.add(SimpleRNN(hu, activation='relu',\n",
    "                            input_shape=(lags, features)))\n",
    "    else:\n",
    "        model.add(LSTM(hu, activation='relu',\n",
    "                       input_shape=(lags, features)))\n",
    "    if algorithm == 'estimation':\n",
    "        model.add(Dense(1, activation='linear'))\n",
    "        model.compile(optimizer='adam', loss='mse', metrics=['mae'])\n",
    "    else:\n",
    "        model.add(Dense(1, activation='sigmoid'))\n",
    "        model.compile(optimizer='adam', loss='binary_crossentropy',\n",
    "                      metrics=['accuracy'])\n",
    "    return model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = create_rnn_model()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "model.fit(g, epochs=500, steps_per_epoch=10,\n",
    "          verbose=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "y = model.predict(g, verbose=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data['pred'] = np.nan\n",
    "data['pred'].iloc[lags:] = y.flatten()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data[[symbol, 'pred']].plot(\n",
    "            figsize=(10, 6), style=['b', 'r-.'],\n",
    "            alpha=0.75);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data[[symbol, 'pred']].iloc[50:100].plot(\n",
    "            figsize=(10, 6), style=['b', 'r-.'],\n",
    "            alpha=0.75);"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Financial Return Series "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = generate_data()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data['r'] = np.log(data / data.shift(1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data.dropna(inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = (data - data.mean()) / data.std()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "r = data['r'].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "r = r.reshape((len(r), -1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "g = TimeseriesGenerator(r, r, length=lags, batch_size=5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = create_rnn_model()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "model.fit(g, epochs=500, steps_per_epoch=10,\n",
    "          verbose=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "y = model.predict(g, verbose=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data['pred'] = np.nan\n",
    "data['pred'].iloc[lags:] = y.flatten()\n",
    "data.dropna(inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data[['r', 'pred']].iloc[50:100].plot(\n",
    "            figsize=(10, 6), style=['b', 'r-.'],\n",
    "            alpha=0.75);\n",
    "plt.axhline(0, c='grey', ls='--');"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import accuracy_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "accuracy_score(np.sign(data['r']), np.sign(data['pred']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "split = int(len(r) * 0.8)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "train = r[:split]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test = r[split:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "g = TimeseriesGenerator(train, train, length=lags, batch_size=5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "set_seeds()\n",
    "model = create_rnn_model(hu=100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "model.fit(g, epochs=100, steps_per_epoch=10, verbose=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "g_ = TimeseriesGenerator(test, test, length=lags, batch_size=5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "y = model.predict(g_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "accuracy_score(np.sign(test[lags:]), np.sign(y))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Financial Features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = generate_data()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data['r'] = np.log(data / data.shift(1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "window = 20\n",
    "data['mom'] = data['r'].rolling(window).mean()\n",
    "data['vol'] = data['r'].rolling(window).std()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data.dropna(inplace=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Estimation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "split = int(len(data) * 0.8)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "train = data.iloc[:split].copy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "mu, std = train.mean(), train.std()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "train = (train - mu) / std"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test = data.iloc[split:].copy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test = (test - mu) / std"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "g = TimeseriesGenerator(train.values, train['r'].values,\n",
    "                        length=lags, batch_size=5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "set_seeds()\n",
    "model = create_rnn_model(hu=100, features=len(data.columns),\n",
    "                         layer='SimpleRNN')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "model.fit(g, epochs=100, steps_per_epoch=10,\n",
    "                verbose=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "g_ = TimeseriesGenerator(test.values, test['r'].values,\n",
    "                         length=lags, batch_size=5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "y = model.predict(g_).flatten()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "accuracy_score(np.sign(test['r'].iloc[lags:]), np.sign(y))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Classification"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "set_seeds()\n",
    "model = create_rnn_model(hu=50,\n",
    "            features=len(data.columns),\n",
    "            layer='LSTM',\n",
    "            algorithm='classification')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_y = np.where(train['r'] > 0, 1, 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.bincount(train_y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def cw(a):\n",
    "    c0, c1 = np.bincount(a)\n",
    "    w0 = (1 / c0) * (len(a)) / 2\n",
    "    w1 = (1 / c1) * (len(a)) / 2\n",
    "    return {0: w0, 1: w1}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "g = TimeseriesGenerator(train.values, train_y,\n",
    "                        length=lags, batch_size=5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "model.fit(g, epochs=5, steps_per_epoch=10,\n",
    "          verbose=False, class_weight=cw(train_y))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_y = np.where(test['r'] > 0, 1, 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "g_ = TimeseriesGenerator(test.values, test_y,\n",
    "                         length=lags, batch_size=5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "y = np.where(model.predict(g_, batch_size=None) > 0.5,\n",
    "             1, 0).flatten()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.bincount(y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "accuracy_score(test_y[lags:], y)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Deep RNNs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from keras.layers import Dropout"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def create_deep_rnn_model(hl=2, hu=100, layer='SimpleRNN',\n",
    "                          optimizer='rmsprop', features=1,\n",
    "                          dropout=False, rate=0.3, seed=100):\n",
    "    if hl <= 2: hl = 2\n",
    "    if layer == 'SimpleRNN':\n",
    "        layer = SimpleRNN\n",
    "    else:\n",
    "        layer = LSTM\n",
    "    model = Sequential()\n",
    "    model.add(layer(hu, input_shape=(lags, features),\n",
    "                     return_sequences=True,\n",
    "                    ))\n",
    "    if dropout:\n",
    "        model.add(Dropout(rate, seed=seed))\n",
    "    for _ in range(2, hl):\n",
    "        model.add(layer(hu, return_sequences=True))\n",
    "        if dropout:\n",
    "            model.add(Dropout(rate, seed=seed))\n",
    "    model.add(layer(hu))\n",
    "    model.add(Dense(1, activation='sigmoid'))\n",
    "    model.compile(optimizer=optimizer,\n",
    "                  loss='binary_crossentropy',\n",
    "                  metrics=['accuracy'])\n",
    "    return model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "set_seeds()\n",
    "model = create_deep_rnn_model(\n",
    "            hl=2, hu=50, layer='SimpleRNN',\n",
    "            features=len(data.columns),\n",
    "            dropout=True, rate=0.3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "model.fit(g, epochs=200, steps_per_epoch=10,\n",
    "          verbose=False, class_weight=cw(train_y))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "y = np.where(model.predict(g_, batch_size=None) > 0.5,\n",
    "             1, 0).flatten()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.bincount(y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "accuracy_score(test_y[lags:], y)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<img src='http://hilpisch.com/taim_logo.png' width=\"350px\" align=\"right\">\n",
    "\n",
    "<br><br><br><a href=\"http://tpq.io\" target=\"_blank\">http://tpq.io</a> | <a href=\"http://twitter.com/dyjh\" target=\"_blank\">@dyjh</a> | <a href=\"mailto:ai@tpq.io\">ai@tpq.io</a>"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
